/* OPTIONS OBS=500 NOREPLACE;  */

%include "ASMimplibs.sas";


/* Construct variables for production function estimation. */

%MACRO construct_pf_vars(data=,yr=);

DATA concrete_&data.&yr;
 SET concrete_&data.&yr;
  if tvs>0 then lnq = log(tvs);
  if ph > 0 then pw_wage = ww/ph;  /* pw_wage gives the average production worker wage rate */
  else pw_wage=.;                   /* ww=production worker wages; ph = production worker hours */
  if pw_wage>0
  then pwhours = sw/pw_wage;  /* "production worker equivalent" hours */
                              /* sw = total salaries and wages (all employees) */
  else pwhours = .;
  if pwhours>0 then lth = log(pwhours);  * log of total "production worker equivalent" hours;
  if te>0 then lemp = log(te);
  else lemp = .;
  energy = (sum(cf,ee));
  if energy > 0 then le=log(energy);    * log of (nominal) energy inputs;
  mat = cm - energy;
  if mat > 0 then lm = log(mat);   * log of (nominal) intermediate inputs (minus energy);
  if tab>0 then lk = log(tab);     * log of nominal book value of assets, beginning of year;
run;

%MEND;


/* Estimate production functions on a single dataset. */

%MACRO estimate_pf_single(data=,year=);

proc reg data=concrete_&data.&year;
QKLEM: model lnQ = lK lth lE lM; 
CRS: test lK+lth+lE+lM=1;
 output out=concrete_resids_&data.&year
       residual=uhat
       predicted=lnQhat;
 title1 "Production function regression on &data data";
 title2 "20&year CMF non-AR tabulated concrete mfg plants";
run;

data concrete_cs_&year;
 set indcs.cost_shares_concrete;
 if year = 20&year;
run;

/* Merge industry average cost shares with plant-level residuals data 
   and compute residuals using industry cost-shares for output elasticities.*/

data concrete_resids_&data.&year;
 if _N_ = 1 then set concrete_cs_&year;
 set concrete_resids_&data.&year;
 cs_resid =  lnQ - ((iake + iaks)*lK + ial*lth + iae*lE + iam*lM);
run; 

%MEND;



%MACRO estimate_TFP_on_single(data=,year=,var=);

proc sort data=concrete_resids_&data.&year;
 by BEA_CEA_Code;
run;


/* Keep only the plants for which we can compute the TFP residual. */
/* Set a dummy variable for counting. */
data concrete_resids_&data.&year;
 set concrete_resids_&data.&year;
 tfp_f=1;
 if &var ne . ;
run;

/** Create a count of plants with TFP within each CEA, 
    and merge the counts back with the residuals. 
**/
proc freq data=concrete_resids_&data.&year;
by BEA_CEA_Code;
 tables tfp_f / out=resid_counts_&data.&year NOPRINT;
run;


data resid_counts_&data.&year (keep = BEA_CEA_Code count);
 set resid_counts_&data.&year;
run;

data concrete_resids_&data.&year (drop = tfp_f);
 merge concrete_resids_&data.&year resid_counts_&data.&year;
 by BEA_CEA_Code;
run;

/* Compute TFP dispersion statistics for the entire ready-mix concrete industry. */


proc means data=concrete_resids_&data.&year p10 q1 q3 p90 stddev NOPRINT;
 var &var;
 output out=concrete_resids_&data.&year._iqrs (keep = ltfp_p10_&var ltfp_q1_&var ltfp_q3_&var ltfp_p90_&var ltfp_sd_&var) p10=ltfp_p10_&var q1=ltfp_q1_&var q3 = ltfp_q3_&var p90=ltfp_p90_&var stddev=ltfp_sd_&var;
run;


data concrete_resids_&data.&year._iqrs;
 set concrete_resids_&data.&year._iqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 ltfp_90_10_&var = ltfp_p90_&var - ltfp_p10_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
 tfp_90_10_ratio_&var = exp(ltfp_90_10_&var);
run;

proc print data=concrete_resids_&data.&year._iqrs;
 var ltfp_sd_&var ltfp_iqr_&var ltfp_90_10_&var tfp_75_25_ratio_&var tfp_90_10_ratio_&var;
title1 "Measures of TFP dispersion, &data data, TFP=&var";
 title2 "&year CMF non-AR tabulated concrete mfg plants";
run;

/* Now keep only the plants in CEAs with at least 5 plants in the CEA. */
data resids5_&data.&year;
 set concrete_resids_&data.&year;
if count ge 5;
run;

proc sort data=resids5_&data.&year;
 by BEA_CEA_Code;
run;

/* Compute the 10th percentile, the interquartile range, and the median of each within-CEA TFP distribution. */
proc means data=resids5_&data.&year p10 q1 median q3  NOPRINT;
 var &var;
 by BEA_CEA_Code;
 output out=resids5_&data.&year._CEAiqrs (keep = BEA_CEA_Code ltfp_p10_&var ltfp_q1_&var ltfp_med_&var ltfp_q3_&var) p10=ltfp_p10_&var q1=ltfp_q1_&var median=ltfp_med_&var q3 = ltfp_q3_&var ;
run;


data resids5_&data.&year._CEAiqrs;
 set resids5_&data.&year._CEAiqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
run;

/* Construct within-CEA TVS share weights. */

proc sort data=resids5_&data.&year;
 by BEA_CEA_Code;
run;

proc means data=resids5_&data.&year SUM  NOPRINT;
 var TVS;
by BEA_CEA_Code;
 output out = CEA_tvs_tots&data.&year (keep = BEA_CEA_Code CEAtvstot ) sum=CEAtvstot ; 
run;


data resids5_&data.&year;
 merge resids5_&data.&year CEA_tvs_tots&data.&year;
by BEA_CEA_Code;
 CEAtvssh = tvs/CEAtvstot;
run;

/* Compute within-CEA tvs-share-weighted mean TFP. */

proc means data=resids5_&data.&year mean NOPRINT;
 var &var;
 weight CEAtvssh;
 by BEA_CEA_Code;
 output out=resids5_&data.&year._tfpmean (keep = BEA_CEA_Code ltfp_wtdmean_&var) mean= ltfp_wtdmean_&var;
run;

proc means data=resids5_&data.&year mean NOPRINT;
 var TVS;
by BEA_CEA_Code;
 output out=resids5_&data.&year._TVSmean (keep = BEA_CEA_Code CEAtvsmean) mean=CEAtvsmean;
run;

/*  Merge CEA tfp means with TFP IQRs, medians, and 10th percentiles. */
data resids5_&data.&year._tfp_stats;
 merge resids5_&data.&year._tfpmean resids5_&data.&year._CEAiqrs resids5_&data.&year._TVSmean;
by BEA_CEA_code;
 lnCEAtvsmean = log(CEAtvsmean);
run;

/* Merge CEA plant counts with CEA TFP stats. */
data  resids5_&data.&year._tfp_stats;
 merge resid_counts_&data.&year (in=incount) resids5_&data.&year._tfp_stats (in=in5);
by BEA_CEA_Code;
if incount and in5;
run;

/* Compute descriptive statistics for CEA TFP stats. */ 
proc means data=resids5_&data.&year._tfp_stats mean stddev qrange p10 p90;
 var ltfp_iqr_&var ltfp_med_&var ltfp_wtdmean_&var ltfp_p10_&var lnCEAtvsmean CEAtvsmean;
title1 "CEA-level TFP statistics, &data data, year=&year";
title2 "CEAs with >= 5 plants with TFP=&var";
run;

proc means data=resids5_&data.&year._tfp_stats mean min q1 median q3 max;
 var count;
title1 "CEA-level plant counts, &data data, year=&year";
title2 "CEAs with >= 5 concrete plants with TFP=&var";
run;

%MEND;


%MACRO estimate_TFP_by_CEAyear(data=,var=);

proc sort data=concrete_resids_&data;
 by year BEA_CEA_Code;
run;

/* Keep only the plants for which we can compute the TFP residual. */
/* Set a dummy variable for counting. */
data concrete_resids_&data;
 set concrete_resids_&data;
 tfp_f=1;
 if &var ne . ;
run;

/** Create a count of plants with TFP within each CEA-year cell, 
    and merge the counts back with the residuals. 
**/
proc freq data=concrete_resids_&data;
by year BEA_CEA_Code;
 tables tfp_f / out=resid_counts_&data NOPRINT;
run;

data resid_counts_&data (keep = year BEA_CEA_Code count);
 set resid_counts_&data;
run;

data concrete_resids_&data (drop = tfp_f);
 merge concrete_resids_&data resid_counts_&data;
 by year BEA_CEA_Code;
run;

/* Compute TFP dispersion statistics for the entire ready-mix concrete industry
   by year. */

proc means data=concrete_resids_&data p10 q1 q3 p90 stddev NOPRINT;
 var &var;
by year;
 output out=concrete_resids_&data._iqrs (keep = year ltfp_p10_&var ltfp_q1_&var ltfp_q3_&var ltfp_p90_&var ltfp_sd_&var) p10=ltfp_p10_&var q1=ltfp_q1_&var q3 = ltfp_q3_&var p90=ltfp_p90_&var stddev=ltfp_sd_&var;
run;


data concrete_resids_&data._iqrs;
 set concrete_resids_&data._iqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 ltfp_90_10_&var = ltfp_p90_&var - ltfp_p10_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
 tfp_90_10_ratio_&var = exp(ltfp_90_10_&var);
run;

proc print data=concrete_resids_&data._iqrs;
 var year ltfp_sd_&var ltfp_iqr_&var ltfp_90_10_&var tfp_75_25_ratio_&var tfp_90_10_ratio_&var;
title1 "Measures of real TFP dispersion, &data data, TFP=&var";
 title2 "CMF non-AR tabulated concrete mfg plants, base year=2002";
run;

/* Now keep only the plants in CEAs with at least 5 plants in the CEA. */
data resids5_&data;
 set concrete_resids_&data;
if count ge 5;
run;

proc sort data=resids5_&data;
 by year BEA_CEA_Code;
run;

/* Compute the 10th percentile, the interquartile range, and the median of each within-CEA-year TFP distribution. */
proc means data=resids5_&data p10 q1 median q3  NOPRINT;
 var &var;
 by year BEA_CEA_Code;
 output out=resids5_&data._CEAiqrs (keep = year BEA_CEA_Code ltfp_p10_&var ltfp_q1_&var ltfp_med_&var ltfp_q3_&var) p10=ltfp_p10_&var q1=ltfp_q1_&var median=ltfp_med_&var q3 = ltfp_q3_&var ;
run;


data resids5_&data._CEAiqrs;
 set resids5_&data._CEAiqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
run;

/* Construct within-CEA-year TVS share weights. */

proc sort data=resids5_&data;
 by year BEA_CEA_Code;
run;

proc means data=resids5_&data SUM  NOPRINT;
 var TVS;
by year BEA_CEA_Code;
 output out = CEA_tvs_tots&data (keep = year BEA_CEA_Code CEAtvstot ) sum=CEAtvstot ; 
run;


data resids5_&data;
 merge resids5_&data CEA_tvs_tots&data;
by year BEA_CEA_Code;
 CEAtvssh = tvs/CEAtvstot;
run;

/* Compute within-CEA tvs-share-weighted mean TFP. */

proc means data=resids5_&data mean NOPRINT;
 var &var;
 weight CEAtvssh;
 by year BEA_CEA_Code;
 output out=resids5_&data._tfpmean (keep = year BEA_CEA_Code ltfp_wtdmean_&var) mean= ltfp_wtdmean_&var;
run;

proc means data=resids5_&data mean NOPRINT;
 var q;
by year BEA_CEA_Code;
 output out=resids5_&data._qmean (keep = year BEA_CEA_Code CEAyrqmean) mean=CEAyrqmean;
run;

/*  Merge CEA tfp means with TFP IQRs, medians, 10th percentiles, and mean output */
data resids5_&data._tfp_stats;
 merge resids5_&data._tfpmean resids5_&data._CEAiqrs resids5_&data._qmean;
by year BEA_CEA_code;
 lnCEAyrqmean = log(CEAyrqmean);
run;

/* Merge CEA plant counts with CEA TFP stats. */
data  resids5_&data._tfp_stats;
 merge resid_counts_&data (in=incount) resids5_&data._tfp_stats (in=in5);
by year BEA_CEA_Code;
if incount and in5;
run;

/* Compute descriptive statistics for CEA-year TFP stats. */

 
proc means data=resids5_&data._tfp_stats mean stddev qrange;
 var ltfp_iqr_&var ltfp_med_&var ltfp_wtdmean_&var ltfp_p10_&var lnCEAyrqmean CEAyrqmean;
title1 "Summary stats for within-CEA-year statistics, ";
title2 "&data data, 2002 and 2007 pooled, CEAs with >= 5 plants with TFP=&var";
run;

/* 10th percentile: */
proc means data=resids5_&data._tfp_stats p10 NOPRINT;
 var ltfp_iqr_&var ltfp_med_&var ltfp_wtdmean_&var ltfp_p10_&var lnCEAyrqmean CEAyrqmean;
output out=resids5_&data._statP10 p10(ltfp_iqr_&var)=ltfp_iqr_p10_&var p10(ltfp_med_&var)=ltfp_med_p10_&var p10(ltfp_wtdmean_&var)=ltfp_wtdmean_p10_&var p10(ltfp_p10_&var)=ltfp_p10_p10_&var p10(lnCEAyrqmean)=lnCEAyrqmeanp10 p10(CEAyrqmean)=CEAyrqmeanp10;
run;

/* 90th percentile: */
proc means data=resids5_&data._tfp_stats p90 NOPRINT;
 var ltfp_iqr_&var ltfp_med_&var ltfp_wtdmean_&var ltfp_p10_&var lnCEAyrqmean CEAyrqmean;
output out=resids5_&data._statP90 p90(ltfp_iqr_&var)=ltfp_iqr_p90_&var p90(ltfp_med_&var)=ltfp_med_p90_&var p90(ltfp_wtdmean_&var)=ltfp_wtdmean_p90_&var p90(ltfp_p10_&var)=ltfp_p10_p90_&var p90(lnCEAyrqmean)=lnCEAyrqmeanp90 p90(CEAyrqmean)=CEAyrqmeanp90;
run;

/* Merge the 10th and 90th percentile datasets to calculate 90-10 ranges. */
data resids5_&data._stat9010s;
 merge resids5_&data._statP90 resids5_&data._statP10;
 ltfp_iqr_9010_&var=ltfp_iqr_p90_&var - ltfp_iqr_p10_&var;
 ltfp_med_9010_&var=ltfp_med_p90_&var - ltfp_med_p10_&var;
 ltfp_wtdmean_9010_&var=ltfp_wtdmean_p90_&var - ltfp_wtdmean_p10_&var;
 ltfp_p10_9010_&var=ltfp_p10_p90_&var - ltfp_p10_p10_&var;
 lnCEAyrqmean9010 = lnCEAyrqmeanp90 - lnCEAyrqmeanp10;
 CEAyrqmean9010 = CEAyrqmeanp90 - CEAyrqmeanp10;
run;

proc means data=resids5_&data._stat9010s mean;
 var ltfp_iqr_9010_&var ltfp_med_9010_&var ltfp_wtdmean_9010_&var ltfp_p10_9010_&var lnCEAyrqmean9010 CEAyrqmean9010;
title1 "90-10 ranges of within-CEA-year level statistics, ";
title2 "&data data, 2002 and 2007 pooled, CEAs with >= 5 plants with TFP=&var";
run;

proc means data=resids5_&data._tfp_stats mean min q1 median q3 max;
 var count;
title1 "CEA-year plant counts, &data data, 2002 and 2007 pooled";
title2 "CEA-years with >= 5 concrete plants with TFP=&var";
run;

%MEND;


%MACRO Construct_CEA_imp_rate(year=);

/* Merge the survu_id's from the complete-cases data with those
   from the Bureau-completed data and compute imputation rates
   by CEA.  Then merge the imputation rates back with the 
   residuals from the TFP residuals in the Bureau-completed data. 
*/

data complete_data_&year (keep = survu_id);
set resids5_good&year;
run;
proc sort data=complete_data_&year;
 by survu_id;
run;

data allplants_&year (keep = survu_id BEA_CEA_Code);
set resids5_CB&year;
run;
proc sort data=allplants_&year;
 by survu_id;
run;

data allplants_&year;
merge allplants_&year (in=inCB) complete_data_&year (in=ingood);
by survu_id;
if inCB and ingood then imp=0;
else if inCB then imp=1;
run;

proc sort data=allplants_&year;
 by BEA_CEA_Code;
run;

proc means data=allplants_&year mean noprint;
 var imp;
by BEA_CEA_Code;
output out=CEA_imp_rates&year (keep = BEA_CEA_Code imp_rate) mean=imp_rate;
run;

proc sort data=resids5_CB&year._tfp_stats;
 by BEA_CEA_Code;
run;

data resids5_CB&year._tfp_stats;
merge resids5_CB&year._tfp_stats CEA_imp_rates&year;
 by BEA_CEA_Code; 
run;

proc means data=resids5_CB&year._tfp_stats mean stddev qrange p10 p90;
 var imp_rate;
title1 "CEA-level imputation rates, CB data, 20&year";
title2 "CEAs with >= 5 concrete plants with TFP";
run;


%MEND;

%MACRO get_and_merge_var_imp_rates(data=,year=);

proc sort data=concrete&year._w_imp_flags;
 by BEA_CEA_Code;
run;

proc means data=concrete&year._w_imp_flags mean NOPRINT;
 var tvs_imp cm_imp cf_imp ee_imp ph_imp tab_imp;
 by BEA_CEA_Code;
output out=var_imp_rates&year mean(tvs_imp)=tvs_imprate mean(cm_imp)=cm_imprate mean(cf_imp)=cf_imprate mean(ee_imp)=ee_imprate mean(ph_imp)=ph_imprate mean(tab_imp)=tab_imprate;
run;

proc sort data=resids5_&data.&year._tfp_stats;
 by BEA_CEA_Code;
run;
data resids5_&data.&year._tfp_stats;
 merge resids5_&data.&year._tfp_stats (in=intfp) var_imp_rates&year (in=inimp);
by BEA_CEA_Code;
if intfp and inimp;
run;

proc means data=resids5_&data.&year._tfp_stats mean stddev qrange p10 p90;
 var tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate;
title1 "CEA-level imputation rates, &data data, 20&year";
title2 "CEAs with >= 5 concrete plants with TFP";
run;

%MEND;

%MACRO merge_var_imp_rates_real(data=);

data var_imp_rates02;
 set var_imp_rates02;
 year=2002;
run;
data var_imp_rates07;
 set var_imp_rates07;
 year=2007;
run;


data var_imp_rates;
 set var_imp_rates02 
     var_imp_rates07;
run;

proc sort data=var_imp_rates;
 by year BEA_CEA_Code;
run;

proc sort data=resids5_&data._tfp_stats;
 by year BEA_CEA_Code;
run;

data resids5_&data._tfp_stats;
 merge resids5_&data._tfp_stats (in=intfp) var_imp_rates (in=inimp);
by year BEA_CEA_Code;
if intfp and inimp;
run;

data CEA_imp_rates02;
set CEA_imp_rates02;
 year = 2002;
run;

data CEA_imp_rates07;
set CEA_imp_rates07;
 year = 2007;
run;

data CEA_imp_rates;
 set CEA_imp_rates02
     CEA_imp_rates07;
run;

proc sort data=CEA_imp_rates;
 by year BEA_CEA_Code;
run;


data resids5_&data._tfp_stats;
 merge resids5_&data._tfp_stats (in=intfp) CEA_imp_rates (in=inimp);
by year BEA_CEA_Code;
if intfp and inimp;
run;

%MEND;

%MACRO ddensity_regressions(data=,year=,var=);

/** Merge the CEA concrete demand density variable with the CEA-level TFP statistics: */

proc sort data=concrete_&data.&year nodupkey out=Ddensity&data.&year (keep = BEA_CEA_Code CEA_ddensity&year);
 by BEA_CEA_Code;
run;
proc sort data=resids5_&data.&year._tfp_stats;
 by BEA_CEA_Code;
run;

data resids5_&data.&year._tfp_stats;
 merge resids5_&data.&year._tfp_stats (in=inresids) Ddensity&data.&year (in=inDD);
by BEA_CEA_Code;
if inresids and inDD; 
 lnCEADD = log(CEA_ddensity&year);
run;

proc means data=resids5_&data.&year._tfp_stats mean stddev qrange p10 p90;
 var lnCEADD CEA_ddensity&year;
title1 "CEA-level concrete demand density, &data data, year=&year";
title2 "CEAs with >= 5 concrete plants with TFP";
run;

/** Regress the CEA-level TFP statistics on CEA-level "concrete demand density". */

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD / WHITE; 
 title1 "TFP IQR on concrete demand density, using &data data, year=&year";
 title2 "CEAs with >= 5 plants with TFP";
run;


proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD / WHITE; 
where count ge 10;
 title1 "TFP IQR on concrete demand density, using &data data, year=&year";
 title2 "CEAs with >= 10 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD imp_rate / WHITE; 
 title1 "TFP IQR on concrete demand density and imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD tvs_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and TVS imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD cm_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and CM imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD EE_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and EE imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD cf_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and CF imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD ph_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and PH imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD tab_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and TAB imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and variable imputation rates"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;


proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD imp_rate / WHITE; 
where count ge 10;
 title1 "TFP IQR on concrete demand density and imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 10 plants with TFP";
run;


proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = imp_rate / WHITE; 
 title1 "TFP IQR on imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model ltfp_iqr_&var = imp_rate / WHITE; 
where count ge 10;
 title1 "TFP IQR on imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 10 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model lnCEADD = imp_rate / WHITE; 
 title1 "concrete demand density on imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
IQR: model lnCEADD = imp_rate / WHITE; 
where count ge 10;
 title1 "concrete demand density on imputation rate"; 
 title2 "&data data, year=&year, CEAs with >= 10 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
TFP_median: model ltfp_med_&var = lnCEADD / WHITE; 
 title1 "Median TFP on concrete demand density, using &data data, year=&year";
 title2 "CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
TFP_mean: model ltfp_wtdmean_&var = lnCEADD / WHITE; 
 title1 "Weighted mean TFP on concrete demand density, using &data data, year=&year";
 title2 "CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
TFP_10p: model ltfp_p10_&var = lnCEADD / WHITE; 
 title1 "10th percentile of TFP on concrete demand density, using &data data, year=&year";
 title2 "CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data.&year._tfp_stats;
ltvsmean: model lnCEAtvsmean = lnCEADD / WHITE; 
 title1 "Log of average TVS on concrete demand density, using &data data, year=&year";
 title2 "CEAs with >= 5 plants with TFP";
run;


%MEND;

%MACRO real_ddensity_regressions(data=,year=,var=);

/** Merge the CEA concrete demand density variable with the CEA-level TFP statistics: */

proc sort data=concrete_&data.&year nodupkey out=Ddensity&data (keep = year BEA_CEA_Code CEA_ddensity dummy07);
 by year BEA_CEA_Code;
run;
proc sort data=resids5_&data._tfp_stats;
 by year BEA_CEA_Code;
run;

data resids5_&data._tfp_stats;
 merge resids5_&data._tfp_stats (in=inresids) Ddensity&data (in=inDD);
by year BEA_CEA_Code;
if inresids and inDD; 
 lnCEADD = log(CEA_ddensity);
run;


proc means data=resids5_&data._tfp_stats mean stddev qrange p10 p90;
 var lnCEADD CEA_ddensity;
title1 "CEA-level concrete demand density, &data data, 2002 and 2007 pooled";
title2 "CEAs with >= 5 concrete plants with TFP";
run;


/** Regress the CEA-level TFP statistics on CEA-level "concrete demand density". */

proc reg data= resids5_&data._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD / WHITE; 
 title1 "TFP IQR on concrete demand density, ";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD dummy07 / WHITE; 
 title1 "TFP IQR on concrete demand density and year dummy";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
IQR: model ltfp_iqr_&var = lnCEADD dummy07 tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate / WHITE; 
 title1 "TFP IQR on concrete demand density and imputation rates";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
IQR: model lnCEADD = tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate / WHITE; 
 title1 "concrete demand density on variable imputation rates "; 
 title2 "&data data, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
IQR: model lnCEADD = imp_rate  / WHITE; 
 title1 "concrete demand density on CEA imputation rate (any variable)"; 
 title2 "&data data, CEAs with >= 5 plants with TFP";
run;


proc reg data= resids5_&data._tfp_stats;
TFP_median: model ltfp_med_&var = lnCEADD / WHITE; 
 title1 "Median TFP on concrete demand density, ";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
TFP_median: model ltfp_med_&var = lnCEADD dummy07 / WHITE; 
 title1 "Median TFP on concrete demand density and year dummy";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
TFP_median: model ltfp_med_&var = lnCEADD dummy07 tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate / WHITE; 
 title1 "Median TFP on concrete demand density and imputation rates";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;


proc reg data= resids5_&data._tfp_stats;
TFP_mean: model ltfp_wtdmean_&var = lnCEADD / WHITE; 
 title1 "Weighted mean TFP on concrete demand density, ";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
TFP_mean: model ltfp_wtdmean_&var = lnCEADD dummy07 / WHITE; 
 title1 "Weighted mean TFP on concrete demand density and year dummy";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
TFP_mean: model ltfp_wtdmean_&var = lnCEADD dummy07 tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate / WHITE; 
 title1 "Weighted mean TFP on concrete demand density and imputation rates";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
TFP_10p: model ltfp_p10_&var = lnCEADD / WHITE; 
 title1 "10th percentile of TFP on concrete demand density, ";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
TFP_10p: model ltfp_p10_&var = lnCEADD dummy07 / WHITE; 
 title1 "10th percentile of TFP on concrete demand density and year dummy";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
TFP_10p: model ltfp_p10_&var = lnCEADD dummy07 tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate / WHITE; 
 title1 "10th percentile of TFP on concrete demand density and imputation rates";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
ltvsmean: model lnCEAyrqmean = lnCEADD  / WHITE; 
 title1 "Log of average output on concrete demand density, ";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
ltvsmean: model lnCEAyrqmean = lnCEADD dummy07 / WHITE; 
 title1 "Log of average output on concrete demand density and year dummy";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;

proc reg data= resids5_&data._tfp_stats;
ltvsmean: model lnCEAyrqmean = lnCEADD dummy07 tvs_imprate cm_imprate ee_imprate cf_imprate ph_imprate tab_imprate / WHITE; 
 title1 "Log of average output on concrete demand density, ";
 title2 "using &data data, year=&year, CEAs with >= 5 plants with TFP";
run;


%MEND;


/* Estimate production functions using the CART-completed samples.
   Combine the parameter estimates from the multiple implicates and compute overall standard errors using
   Rubin's combining formulas. */

%MACRO estimate_pf_on_CARTdata(year=);

proc datasets library =work;
modify concrete_CART&year;
rename _IMPUTE_ = _IMPUTATION_;
RUN;

proc sort data=concrete_CART&year;
 by _IMPUTATION_;
run;


proc reg data=concrete_CART&year outest=outreg_concrete&year covout noprint;
QKLEM: model lnQ = lK lth lE lM; 
CRS: test lK+lth+lE+lM=1;
 output out=concrete_resids_CART&year
       residual=uhat
       predicted=lnQhat;
by  _IMPUTATION_;
run;

proc mianalyze data=outreg_concrete&year;
 modeleffects lK lth LE LM ;
title1 "Production function regressions, CART-completed data";
title2 "20&year concrete manufacturing";
run;

/* Merge industry average cost shares with plant-level residuals data 
   and compute residuals using industry cost-shares for output elasticities.*/

data concrete_resids_CART&year;
 if _N_ = 1 then set concrete_cs_&year;
 set concrete_resids_CART&year;
 cs_resid =  lnQ - ((iake + iaks)*lK + ial*lth + iae*lE + iam*lM);
run; 


%MEND;


/* Estimate TFP dispersion using the CART-completed samples. **/

%MACRO estimate_TFP_on_CARTdata(year=,var=);

proc sort data=concrete_resids_CART&year;
 by  _IMPUTATION_ BEA_CEA_Code;
run;


/* Keep only the plants for which we can compute the TFP residual. */
/* Set a dummy variable for counting. */
data concrete_resids_CART&year;
 set concrete_resids_CART&year;
 tfp_f=1;
 if &var ne . ;
run;

/** Create a count of plants with TFP within each CEA, 
    and merge the counts back with the residuals. 
**/
proc freq data=concrete_resids_CART&year;
by _IMPUTATION_ BEA_CEA_Code;
 tables tfp_f / out=resid_counts_CART&year NOPRINT;
run;


data resid_counts_CART&year (keep = _IMPUTATION_ BEA_CEA_Code count);
 set resid_counts_CART&year;
run;

data concrete_resids_CART&year (drop = tfp_f);
 merge concrete_resids_CART&year resid_counts_CART&year;
 by _IMPUTATION_ BEA_CEA_Code;
run;

/* Compute TFP dispersion statistics for the entire ready-mix concrete industry using CART-completed data */

proc means data=concrete_resids_CART&year p10 q1 q3 p90 stddev NOPRINT;
 var &var;
 by _IMPUTATION_;
 output out=concrete_resids_CART&year._iqrs (keep = _IMPUTATION_ ltfp_p10_&var ltfp_q1_&var ltfp_q3_&var ltfp_p90_&var ltfp_sd_&var) p10=ltfp_p10_&var q1=ltfp_q1_&var q3 = ltfp_q3_&var p90=ltfp_p90_&var stddev=ltfp_sd_&var;
 run;

data concrete_resids_CART&year._iqrs;
 set concrete_resids_CART&year._iqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 ltfp_90_10_&var = ltfp_p90_&var - ltfp_p10_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
 tfp_90_10_ratio_&var = exp(ltfp_90_10_&var);
run;

ODS LISTING;


proc means data=concrete_resids_CART&year._iqrs mean stddev;
 var ltfp_sd_&var ltfp_iqr_&var ltfp_90_10_&var tfp_75_25_ratio_&var tfp_90_10_ratio_&var;
title1 "Measures of TFP dispersion, 20&year CART-completed data";
title2 "20-implicate means and between-imputation std. devs., concrete manufacturing";
run;


/* Now keep only the plants in CEAs with at least 5 plants. */

data resids5_CART&year;
 set concrete_resids_CART&year;
if count ge 5;
run;

proc sort data=resids5_CART&year;
 by _IMPUTATION_ BEA_CEA_Code;
run;

/* Compute the 10th percentile, the interquartile range, and the median of each within-CEA TFP distribution. */
proc means data=resids5_CART&year p10 q1 median q3  NOPRINT;
 var &var;
 by _IMPUTATION_ BEA_CEA_Code;
 output out=resids5_CART&year._CEAiqrs (keep = _IMPUTATION_ BEA_CEA_Code ltfp_p10_&var ltfp_q1_&var ltfp_med_&var ltfp_q3_&var ) p10=ltfp_p10_&var q1=ltfp_q1_&var median=ltfp_med_&var q3 = ltfp_q3_&var ;
run;

data resids5_CART&year._CEAiqrs;
 set resids5_CART&year._CEAiqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
run;

/* Construct within-CEA TVS share weights. */

proc sort data=resids5_CART&year;
 by _IMPUTATION_ BEA_CEA_Code;
run;

proc means data=resids5_CART&year SUM NOPRINT;
 var TVS;
by _IMPUTATION_ BEA_CEA_Code;
 output out = CEA_tvs_totsCART&year (keep = _IMPUTATION_ BEA_CEA_Code CEAtvstot ) sum=CEAtvstot ; 
run;


data resids5_CART&year;
 merge resids5_CART&year CEA_tvs_totsCART&year;
by _IMPUTATION_ BEA_CEA_Code;
 CEAtvssh = tvs/CEAtvstot;
run;

/* Compute within-CEA tvs-share-weighted mean TFP. */

proc means data=resids5_CART&year mean NOPRINT;
 var &var;
 weight CEAtvssh;
by _IMPUTATION_ BEA_CEA_Code;
 output out=resids5_CART&year._tfpmean (keep = _IMPUTATION_ BEA_CEA_Code ltfp_wtdmean_&var) mean(&var)= ltfp_wtdmean_&var;
run;

proc means data=resids5_CART&year mean NOPRINT;
 var TVS;
by _IMPUTATION_ BEA_CEA_Code;
 output out=resids5_CART&year._TVSmean (keep = _IMPUTATION_ BEA_CEA_Code CEAtvsmean) mean=CEAtvsmean;
run;

/*  Merge CEA tfp means with TFP IQRs, medians, and 10th percentiles. */
data resids5_CART&year._tfp_stats;
 merge resids5_CART&year._tfpmean resids5_CART&year._CEAiqrs resids5_CART&year._TVSmean;
by _IMPUTATION_ BEA_CEA_code;
 lnCEAtvsmean = log(CEAtvsmean);
run;

/* Compute descriptive statistics for CEA TFP stats. */ 
/* Mean: */
proc means data=resids5_CART&year._tfp_stats mean NOPRINT;
 var ltfp_iqr_&var ltfp_med_&var ltfp_wtdmean_&var ltfp_p10_&var lnCEAtvsmean CEAtvsmean;
by _IMPUTATION_;
output out=resids5_CART&year._statmeans mean(ltfp_iqr_&var)=ltfp_iqr_mean_&var mean(ltfp_med_&var)=ltfp_med_mean_&var mean(ltfp_wtdmean_&var)=ltfp_wtdmean_mean_&var mean(ltfp_p10_&var)=ltfp_p10_mean_&var mean(lnCEAtvsmean)=lnCEAtvsmean_mean;
run;

/* S.D.: */
proc means data=resids5_CART&year._tfp_stats stddev NOPRINT;
 var ltfp_iqr_&var ltfp_med_&var ltfp_wtdmean_&var ltfp_p10_&var lnCEAtvsmean;
by _IMPUTATION_;
output out=resids5_CART&year._statSDs stddev(ltfp_iqr_&var)=ltfp_iqr_sd_&var stddev(ltfp_med_&var)=ltfp_med_sd_&var stddev(ltfp_wtdmean_&var)=ltfp_wtdmean_sd_&var stddev(ltfp_p10_&var)=ltfp_p10_sd_&var;
run;


/*
data resids5_CART&year._tfp_summ;
 set resids5_CART&year._tfp_summ;
 p90_p10_range = p90 - p10;
run;
*/

proc means data=resids5_CART&year._statmeans mean stddev ;
var ltfp_iqr_mean_&var ltfp_med_mean_&var ltfp_wtdmean_mean_&var ltfp_p10_mean_&var lnCEAtvsmean_mean;
 title1 "CEA-level TFP statistics, CART-completed data, 20&year";
 title2 "CEAs with >= 5 plants with TFP";
 title3 "20 implicate means and s.d.s of CEA means";
run;

proc means data=resids5_CART&year._statSDs mean stddev ;
var ltfp_iqr_sd_&var ltfp_med_sd_&var ltfp_wtdmean_sd_&var ltfp_p10_sd_&var;
 title1 "CEA-level TFP statistics, CART-completed data, 20&year";
 title2 "CEAs with >= 5 plants with TFP";
 title3 "20 implicate means and s.d.s of CEA s.d.s ";
run;

%MEND;


%include "estimate_real_CART_tfp_stats_MACRO.SAS";
 

%MACRO ddensity_regressions_CART(year=,var=);


proc sort data=resids5_CART&year._tfp_stats;
 by  _IMPUTATION_;
run;

proc reg data= resids5_CART&year._tfp_stats OUTEST=outregIQR_CART&year covout noprint;
IQR: model ltfp_iqr_&var = lnCEADD / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregIQR_CART&year;
 modeleffects lnCEADD ;
 title1 "TFP IQR on concrete demand density, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;


proc reg data= resids5_CART&year._tfp_stats OUTEST=outregIQR_CART&year covout noprint;
IQR: model ltfp_iqr_&var = lnCEADD dummy07 / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregIQR_CART&year;
 modeleffects lnCEADD dummy07;
 title1 "TFP IQR on concrete demand density and year dummy, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;


proc reg data= resids5_CART&year._tfp_stats OUTEST=outregMED_CART&year covout noprint;
TFP_median: model ltfp_med_&var = lnCEADD  / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregMED_CART&year;
 modeleffects lnCEADD ;
 title1 "Median TFP on concrete demand density, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;


proc reg data= resids5_CART&year._tfp_stats OUTEST=outregMED_CART&year covout noprint;
TFP_median: model ltfp_med_&var = lnCEADD dummy07 / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregMED_CART&year;
 modeleffects lnCEADD dummy07;
 title1 "Median TFP on concrete demand density and year dummy, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;


proc reg data= resids5_CART&year._tfp_stats OUTEST=outregMEAN_CART&year covout noprint;
TFP_mean: model ltfp_wtdmean_&var = lnCEADD / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregMEAN_CART&year;
 modeleffects lnCEADD ;
 title1 "Weighted mean TFP on concrete demand density, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;

proc reg data= resids5_CART&year._tfp_stats OUTEST=outregMEAN_CART&year covout noprint;
TFP_mean: model ltfp_wtdmean_&var = lnCEADD dummy07 / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregMEAN_CART&year;
 modeleffects lnCEADD dummy07;
 title1 "Weighted mean TFP on concrete demand density and year dummy, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;


proc reg data= resids5_CART&year._tfp_stats OUTEST=outregP10_CART&year covout noprint;
TFP_10p: model ltfp_p10_&var = lnCEADD / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregP10_CART&year;
 modeleffects lnCEADD ;
 title1 "10th percentile of TFP on concrete demand density, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;

proc reg data= resids5_CART&year._tfp_stats OUTEST=outregP10_CART&year covout noprint;
TFP_10p: model ltfp_p10_&var = lnCEADD dummy07 / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregP10_CART&year;
 modeleffects lnCEADD dummy07;
 title1 "10th percentile of TFP on concrete demand density and year dummy, ";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;


proc reg data= resids5_CART&year._tfp_stats OUTEST=outregTVSmean_CART&year covout noprint;
ltvsmean: model lnCEAtvsmean = lnCEADD / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregTVSmean_CART&year;
 modeleffects lnCEADD ;
 title1 "Log of mean real TVS on concrete demand density";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;

proc reg data= resids5_CART&year._tfp_stats OUTEST=outregTVSmean_CART&year covout noprint;
ltvsmean: model lnCEAtvsmean = lnCEADD dummy07 / WHITE; 
by  _IMPUTATION_;
run;

proc mianalyze data=outregTVSmean_CART&year;
 modeleffects lnCEADD dummy07 ;
 title1 "Log of mean real TVS on concrete demand density and year dummy";
 title2 "using year=&year CART data, CEAs with >= 5 plants with TFP=&var";
run;


%MEND;


/*********************/
/**  BEGIN PROGRAM ***/
/*********************/


DATA concrete_CB02;
 SET concrete.concrete_02_w_cb_imputes;
run;

%construct_pf_vars(data=CB,yr=02);

DATA concrete_good02;
 SET concrete.concrete_02_gooddata;
run;

%construct_pf_vars(data=good,yr=02);


DATA concrete_CB07;
 SET concrete.concrete_07_w_cb_imputes;
run;

proc datasets library=work;
 modify concrete_CB07;
 rename CEA_ddensity=CEA_ddensity07;
run;

%construct_pf_vars(data=CB,yr=07);

DATA concrete_good07;
 SET concrete.concrete_07_gooddata;
run;

proc datasets library=work;
 modify concrete_good07;
 rename CEA_ddensity=CEA_ddensity07;
run;

%construct_pf_vars(data=good,yr=07);

DATA concrete_CART02;
 SET concrete.concrete_imputes02;
run;

%construct_pf_vars(data=CART,yr=02);

DATA concrete_CART07;
 SET concrete.concrete_imputes07;
run;

proc datasets library=work;
 modify concrete_CART07;
 rename CEA_ddensity=CEA_ddensity07;
run;

%construct_pf_vars(data=CART,yr=07);


%estimate_pf_single(data=good,year=02);
%estimate_pf_single(data=CB,year=02);
%estimate_pf_on_CARTdata(year=02);

%estimate_pf_single(data=good,year=07);
%estimate_pf_single(data=CB,year=07);
%estimate_pf_on_CARTdata(year=07);

%estimate_TFP_on_single(data=good,year=02,var=uhat);
%estimate_TFP_on_single(data=CB,year=02,var=uhat);
%estimate_TFP_on_CARTdata(year=02,var=uhat);

%estimate_TFP_on_single(data=good,year=07,var=uhat);
%estimate_TFP_on_single(data=CB,year=07,var=uhat);
%estimate_TFP_on_CARTdata(year=07,var=uhat);

%Construct_CEA_imp_rate(year=02);
%Construct_CEA_imp_rate(year=07);


**%ddensity_regressions(data=good,year=02,var=uhat);

data concrete02_w_imp_flags (keep = BEA_CEA_Code tvs_imp cm_imp cf_imp ee_imp ph_imp tab_imp);
 set concrete.concrete_02_w_CB_imputes;
run;

%get_and_merge_var_imp_rates(data=CB,year=02);
**%ddensity_regressions(data=CB,year=02,var=uhat);
**%ddensity_regressions_CART(year=02,var=uhat);

**%ddensity_regressions(data=good,year=07,var=uhat);

data concrete07_w_imp_flags (keep = BEA_CEA_Code tvs_imp cm_imp cf_imp ee_imp ph_imp tab_imp);
 set concrete.concrete_07_w_CB_imputes;
run;

%get_and_merge_var_imp_rates(data=CB,year=07);
**%ddensity_regressions(data=CB,year=07,var=uhat);
**%ddensity_regressions_CART(year=07,var=uhat);


proc datasets libary=work;
 delete 
 resid_counts_good02
 resid_counts_good07       
 resid_counts_CB02
 resid_counts_CB07       
 resid_counts_CART02
 resid_counts_CART07       
 concrete_resids_CB02_iqrs
 concrete_resids_CB07_iqrs
 concrete_resids_good02_iqrs
 concrete_resids_good07_iqrs
 concrete_resids_CART02_iqrs
 concrete_resids_CART07_iqrs
 resids5_CB02
 resids5_CB07
 resids5_good02
 resids5_good07
 resids5_CART02
 resids5_CART07
 resids5_CB02_CEAiqrs
 resids5_CB07_CEAiqrs
 resids5_good02_CEAiqrs
 resids5_good07_CEAiqrs
 resids5_CART02_CEAiqrs
 resids5_CART07_CEAiqrs
 CEA_tvs_totsCB02
 CEA_tvs_totsCB07
 CEA_tvs_totsgood02
 CEA_tvs_totsgood07
 CEA_tvs_totsCART02
 CEA_tvs_totsCART07
 resids5_CB02_tfpmean
 resids5_CB07_tfpmean
 resids5_good02_tfpmean
 resids5_good07_tfpmean
 resids5_CART02_tfpmean
 resids5_CART07_tfpmean
 resids5_CB02_tfp_stats
 resids5_CB07_tfp_stats
 resids5_good02_tfp_stats
 resids5_good07_tfp_stats
 resids5_CART02_tfp_stats
 resids5_CART07_tfp_stats
;
run;


/* Calculate cost-share TFP residuals
   from real Bureau-completed panel.
   Calculate residuals for 2002 and 2007.
*/

data concrete_realCB_imp02;
 set concrete.concreteCB_imp0207_panel;
 if year=2002;
run;

%estimate_pf_single(data=realCB_imp,year=02);

data concrete_realCB_imp07;
 set concrete.concreteCB_imp0207_panel;
 if year=2007;
run;

%estimate_pf_single(data=realCB_imp,year=07);


/* Stack the real TFP residuals for 2002 and 2007 */
data concrete_resids_realCB_imp;
 set concrete_resids_realCB_imp02
     concrete_resids_realCB_imp07;
run;

/* Calculate cost-share TFP residuals for 2002 and 2007
   from real CART-completed panel. */

data concrete_CART02;
 set concrete.concreteCART0207_panel;
 if year=2002;
run;

%estimate_pf_on_CARTdata(year=02);

data concrete_CART07;
 set concrete.concreteCART0207_panel;
 if year=2007;
run;

%estimate_pf_on_CARTdata(year=07);

/* Stack the real TFP residuals from the 2002 and 2007 CART data*/

data concrete_resids_realCART;
 set concrete_resids_CART02
     concrete_resids_CART07;
run;

%estimate_TFP_by_CEAyear(data=realCB_imp,var=cs_resid);
%estimate_TFP_by_CEAyearCART(data=realCART,var=cs_resid,M=20);


 %estimate_TFP_on_single(data=good,year=02,var=cs_resid);
 %estimate_TFP_on_single(data=CB,year=02,var=cs_resid);
 %estimate_TFP_on_CARTdata(year=02,var=cs_resid);

 %estimate_TFP_on_single(data=good,year=07,var=cs_resid);
 %estimate_TFP_on_single(data=CB,year=07,var=cs_resid);
 %estimate_TFP_on_CARTdata(year=07,var=cs_resid);

data concrete_realCB_imp0207;
 set concrete.concreteCB_imp0207_panel;
 if year=2002 or year=2007;
 if year=2007 then dummy07 = 1; 
 else dummy07=0;
run;

%merge_var_imp_rates_real(data=realCB_imp);

%real_ddensity_regressions(data=realCB_imp,year=0207,var=cs_resid);


data resids5_CART0207_tfp_stats;
 set resids5_realCART_tfp_stats;
run;

data concrete_CART0207;
 set concrete.concreteCART0207_panel;
 if year=2002 or year=2007;
run;

proc sort data=concrete_CART0207 nodupkey out=DdensityCART0207 (keep = year BEA_CEA_Code CEA_ddensity);
 by year BEA_CEA_Code;
run;
proc sort data=resids5_CART0207_tfp_stats;
 by year BEA_CEA_Code;
run;
data resids5_CART0207_tfp_stats;
 merge resids5_CART0207_tfp_stats (in=inresids) DdensityCART0207 (in=inDD);
by year BEA_CEA_Code;
if inresids and inDD;
 lnCEADD = log(CEA_ddensity);
 if year=2007 then dummy07=1;
 else dummy07=0;
 lnCEAtvsmean = lnCEAqmean;
run;

%ddensity_regressions_CART(year=0207,var=cs_resid);

**%ddensity_regressions(data=good,year=02,var=cs_resid);
%get_and_merge_var_imp_rates(data=CB,year=02);
**%ddensity_regressions(data=CB,year=02,var=cs_resid);

/** Merge the 2002 CEA concrete demand density variable with the 2002 CEA-level TFP statistics: */

proc sort data=concrete_CART02 nodupkey out=DdensityCART02 (keep = BEA_CEA_Code CEA_ddensity02);
 by BEA_CEA_Code;
run;
proc sort data=resids5_CART02_tfp_stats;
 by BEA_CEA_Code;
run;
data resids5_CART02_tfp_stats;
 merge resids5_CART02_tfp_stats (in=inresids) DdensityCART02 (in=inDD);
by BEA_CEA_Code;
if inresids and inDD;
 lnCEADD = log(CEA_ddensity02);
run;


**%ddensity_regressions_CART(year=02,var=cs_resid);

**%ddensity_regressions(data=good,year=07,var=cs_resid);
%get_and_merge_var_imp_rates(data=CB,year=07);
**%ddensity_regressions(data=CB,year=07,var=cs_resid);

/** Merge the 2007 CEA concrete demand density variable with the 2007 CEA-level TFP statistics: */

proc sort data=concrete_CART07 nodupkey out=DdensityCART07 (keep = BEA_CEA_Code CEA_ddensity07);
 by BEA_CEA_Code;
run;
proc sort data=resids5_CART07_tfp_stats;
 by BEA_CEA_Code;
run;
data resids5_CART07_tfp_stats;
 merge resids5_CART07_tfp_stats (in=inresids) DdensityCART07 (in=inDD);
by BEA_CEA_Code;
if inresids and inDD;
 lnCEADD = log(CEA_ddensity07);
run;

**%ddensity_regressions_CART(year=07,var=cs_resid);
